% Generate Panel B in Table 5.
% Rounding 2/sqrt(n) to 0.2 is adopted when computing the local alternative.
% 
% It is advised to run the generalized Bierens max test separately from the other tests, as it may take considerable amount of time.
% Table 5 in Chen et al. (2025) was generated on the high-powered computing system at Columbia University, using distributed seed numbers.
% There may be simulation errors in the final results if different seed numbers are used.

clear
clc

% Designate working directories
dir = '.../Replication package/';
cd(dir);
addpath(genpath(strcat(dir,"Code/src")))
addpath(genpath(strcat(dir,"Datasets")))

% Set seed
seed = 4534546 ;
rng(seed,'twister');

alpha_level = 0.1;            % nominal level of the tests
theta_grid = [0 0.2];
bnd = 5 ;                     % bnd > 0 defines the search space as [-bndbnd]^p, where p = dim(gamma).
bR  = 5000 ;                  % bootstrap replication number used in grid search method.
sz_Gamma = 200000 ;           % number of grids in grid search method.
splits = 1000 ;               % reduce memory requirement
lambda_grid = 1:(-0.1):0 ;    % grid for optimal lambda search.
B = 2 ;

% Load the data
data = double(load("USAY.mat").dataset);
dat = rmmissing([data(:, 6), data(:, 8), lagmatrix(data(:, 3:5), 2), lagmatrix(data(:, 6), 2)]);
Y = dat(:, 1);
X = dat(:, 2);
Z = dat(:, 3:6);

%%%%%%%%%%%%%%%%%%%%
% Part 1: Use 4 IVs
%%%%%%%%%%%%%%%%%%%%
lag = 2;  % use two-period-lagged variables as instruments
[pi, ~, resid_fs] = regress(X, [ones(105,1) Z]);
[pi_res, ~, vhat] = regress(resid_fs, [ones(105,1), Z, Z.^2]);
Xhat = [ones(105,1) Z]*pi;
theta  = regress(Y, [ones(105,1) Xhat]);
uhat = Y - [ones(105,1) X] * theta;
sigma_u = std(uhat);
sigma_v = std(vhat);
rho = 0.8;

Z1 = lagmatrix(Z(:,1:3), 2);

simul_input.T = 105;
simul_input.lag = lag;
simul_input.pi = pi;
simul_input.pi_res = pi_res;
simul_input.rho = rho;
simul_input.theta = theta;
simul_input.sigma_u = sigma_u;
simul_input.sigma_v = sigma_v;
simul_input.X = X;
simul_input.Y = Y;
simul_input.Z1 = Z1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Simulations
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
h = @(y, theta) y(:,1) - y(:,2)*theta;
hdot = @(y, theta) -y(:,2);
theta_grid = theta_grid + theta(2);  % translate by theta(2) = -0.0287
cv_AR = chi2inv(1-alpha_level, 4);

R = 100;  % replication number

rec_rej        = zeros(R, length(lambda_grid), length(theta_grid));
rec_bspv       = zeros(R, length(lambda_grid), length(theta_grid));
rec_opt_lambda = zeros(R, length(theta_grid));
rec_rtime_pB   = zeros(R, 1);
rec_opt_pv     = zeros(R, length(theta_grid));
rec_opt_rej    = zeros(R, length(theta_grid));

for rep = 1:R
    data = gen_data_NL(simul_input, 0);
    y = data.y; dy = y - mean(y);
    x = data.x; dx = x - mean(x);
    z = data.z; dz = z - mean(z);

    % Generalized Bierens max test
    u = y - mean(y) - (x - mean(x))*theta_grid;
    test = pen_Bierens_test_GS(u, z, bnd, lambda_grid, "sz_Gamma", sz_Gamma, "splits", splits, ...
                                    "bR", bR, 'gradient', -x+mean(x), 'B', B, 'dm', 1);

    rec_rej(rep, :, :)     = test.rejection;
    rec_bspv(rep, :, :)    = test.bs_pv;
    rec_opt_lambda(rep, :) = test.opt_lambda;
    rec_opt_pv(rep, :)     = test.opt_pv;
    rec_opt_rej(rep, :)    = (test.opt_pv <= alpha_level);
    rec_rtime_pB(rep)      = test.runtime;

    % Dominguez-Lobato
    test_DL = Dominguez_Lobato([dy, dx], z, h, hdot , 1, 'lb', -20, 'ub', 20);
    rec_rej_DL(rep, :) = (theta_grid < test_DL.lowerCI) | (theta_grid > test_DL.upperCI);
    rec_rtime_DL(rep) = test_DL.rtime;

    % AR test (homoscedasticity assumed)
    Tstart = tic();
    P_Z = dz / (dz' * dz) * dz';
    M_Z = eye(size(z,1)) - P_Z;
    AR_stat = zeros(length(theta_grid), 1);
    for i = 1:length(theta_grid)
        AR_stat(i) = (dy - dx * theta_grid(i))' * P_Z * (dy - dx * theta_grid(i)) ./ ((dy - dx * theta_grid(i))' * M_Z * (dy - dx * theta_grid(i)) / (size(z,1) - size(z,2)));
    end
    rec_rej_AR(rep, :) = (AR_stat > cv_AR);
    Tend = toc(Tstart);
    rec_rtime_AR(rep) = Tend;

    % 2SLS
    Tstart = tic();
    theta_2sls = (dx' * P_Z * dx) \ (dx' * P_Z * dy);
    ehat = dy - dx * theta_2sls;
    Vhat = (dx' * P_Z * dx) \ (dx' * P_Z * diag(ehat.^2) * P_Z * dx) / (dx' * P_Z * dx);  % Heteroscedasticity-consistent variance
    se = sqrt(Vhat);
    lowerCI = theta_2sls - se * norminv(1-alpha_level/2);
    upperCI = theta_2sls + se * norminv(1-alpha_level/2);
    rec_rej_2SLS(rep, :) = (theta_grid < lowerCI) | (theta_grid > upperCI);
    Tend = toc(Tstart);
    rec_rtime_2SLS(rep) = Tend;
end

opt_pb_4IV = mean(rec_opt_rej);
unpenalized_pv = squeeze(rec_bspv(:,end,:));
unp_pb_4IV  = mean(unpenalized_pv <= .1);
unp_pb_adj = mean(unpenalized_pv <= .1) - unp_pb_4IV(1) + .1;
unp_pb_adj_4IV = max(min(unp_pb_adj,1),0);
DL_4IV = mean(rec_rej_DL);
AR_4IV = mean(rec_rej_AR);
TSLS_4IV = mean(rec_rej_2SLS);

%%%%%%%%%%%%%%%%%%%%
% Part 2: Use 6 IVs
%%%%%%%%%%%%%%%%%%%%
lag = 4;  % use second-to-fourth-period-lagged variables as instruments
simul_input.lag = lag;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Simulations
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cv_AR = chi2inv(1-alpha_level, 6);

R = 100;  % replication number

rec_rej        = zeros(R, length(lambda_grid), length(theta_grid));
rec_bspv       = zeros(R, length(lambda_grid), length(theta_grid));
rec_opt_lambda = zeros(R, length(theta_grid));
rec_rtime_pB   = zeros(R, 1);
rec_opt_pv     = zeros(R, length(theta_grid));
rec_opt_rej    = zeros(R, length(theta_grid));

for rep = 1:R
    data = gen_data_NL(simul_input, 0);
    y = data.y; dy = y - mean(y);
    x = data.x; dx = x - mean(x);
    z = data.z; dz = z - mean(z);

    % Generalized Bierens max test
    u = y - mean(y) - (x - mean(x))*theta_grid;
    test = pen_Bierens_test_GS(u, z, bnd, lambda_grid, "sz_Gamma", sz_Gamma, "splits", splits, ...
                                    "bR", bR, 'gradient', -x+mean(x), 'B', B, 'dm', 1);

    rec_rej(rep, :, :)     = test.rejection;
    rec_bspv(rep, :, :)    = test.bs_pv;
    rec_opt_lambda(rep, :) = test.opt_lambda;
    rec_opt_pv(rep, :)     = test.opt_pv;
    rec_opt_rej(rep, :)    = (test.opt_pv <= alpha_level);
    rec_rtime_pB(rep)      = test.runtime;

    % Dominguez-Lobato
    test_DL = Dominguez_Lobato([dy, dx], z, h, hdot , 1, 'lb', -20, 'ub', 20);
    rec_rej_DL(rep, :) = (theta_grid < test_DL.lowerCI) | (theta_grid > test_DL.upperCI);
    rec_rtime_DL(rep) = test_DL.rtime;

    % AR test (homoscedasticity assumed)
    Tstart = tic();
    P_Z = dz / (dz' * dz) * dz';
    M_Z = eye(size(z,1)) - P_Z;
    AR_stat = zeros(length(theta_grid), 1);
    for i = 1:length(theta_grid)
        AR_stat(i) = (dy - dx * theta_grid(i))' * P_Z * (dy - dx * theta_grid(i)) ./ ((dy - dx * theta_grid(i))' * M_Z * (dy - dx * theta_grid(i)) / (size(z,1) - size(z,2)));
    end
    rec_rej_AR(rep, :) = (AR_stat > cv_AR);
    Tend = toc(Tstart);
    rec_rtime_AR(rep) = Tend;

    % 2SLS
    Tstart = tic();
    theta_2sls = (dx' * P_Z * dx) \ (dx' * P_Z * dy);
    ehat = dy - dx * theta_2sls;
    Vhat = (dx' * P_Z * dx) \ (dx' * P_Z * diag(ehat.^2) * P_Z * dx) / (dx' * P_Z * dx);  % Heteroscedasticity-consistent variance
    se = sqrt(Vhat);
    lowerCI = theta_2sls - se * norminv(1-alpha_level/2);
    upperCI = theta_2sls + se * norminv(1-alpha_level/2);
    rec_rej_2SLS(rep, :) = (theta_grid < lowerCI) | (theta_grid > upperCI);
    Tend = toc(Tstart);
    rec_rtime_2SLS(rep) = Tend;
end

opt_pb_6IV = mean(rec_opt_rej);
unpenalized_pv = squeeze(rec_bspv(:,end,:));
unp_pb_6IV  = mean(unpenalized_pv <= .1);
unp_pb_adj = mean(unpenalized_pv <= .1) - unp_pb_6IV(1) + .1;
unp_pb_adj_6IV = max(min(unp_pb_adj,1),0);
DL_6IV = mean(rec_rej_DL);
AR_6IV = mean(rec_rej_AR);
TSLS_6IV = mean(rec_rej_2SLS);

fprintf('Under Linear Specification:\n')
fprintf('1. Sizes using 4 IVs\n')
fprintf('Optimal\tUnpenalized\tUnpen.w/shift\t2SLS\tAR\tDL\n')
fprintf('%.3f\t%.3f\t\t%.3f\t\t%.3f\t%.3f\t%.3f\n', [opt_pb_4IV(1) unp_pb_4IV(1) unp_pb_adj_4IV(1) TSLS_4IV(1) AR_4IV(1) DL_4IV(1)])
fprintf('2. Sizes using 6 IVs\n')
fprintf('Optimal\tUnpenalized\tUnpen.w/shift\t2SLS\tAR\tDL\n')
fprintf('%.3f\t%.3f\t\t%.3f\t\t%.3f\t%.3f\t%.3f\n', [opt_pb_6IV(1) unp_pb_6IV(1) unp_pb_adj_6IV(1) TSLS_6IV(1) AR_6IV(1) DL_6IV(1)])
fprintf('3. Powers using 4 IVs\n')
fprintf('Optimal\tUnpenalized\tUnpen.w/shift\t2SLS\tAR\tDL\n')
fprintf('%.3f\t%.3f\t\t%.3f\t\t%.3f\t%.3f\t%.3f\n', [opt_pb_4IV(2) unp_pb_4IV(2) unp_pb_adj_4IV(2) TSLS_4IV(2) AR_4IV(2) DL_4IV(2)])
fprintf('4. Powers using 6 IVs\n')
fprintf('Optimal\tUnpenalized\tUnpen.w/shift\t2SLS\tAR\tDL\n')
fprintf('%.3f\t%.3f\t\t%.3f\t\t%.3f\t%.3f\t%.3f\n', [opt_pb_6IV(2) unp_pb_6IV(2) unp_pb_adj_6IV(2) TSLS_6IV(2) AR_6IV(2) DL_6IV(2)])
fprintf('5. Difference\n')
fprintf('Optimal\tUnpenalized\tUnpen.w/shift\t2SLS\tAR\tDL\n')
fprintf('%.3f\t%.3f\t\t%.3f\t\t%.3f\t%.3f\t%.3f\n', [opt_pb_4IV(2) unp_pb_4IV(2) unp_pb_adj_4IV(2) TSLS_4IV(2) AR_4IV(2) DL_4IV(2)]- ...
                                                                    [opt_pb_6IV(2) unp_pb_6IV(2) unp_pb_adj_6IV(2) TSLS_6IV(2) AR_6IV(2) DL_6IV(2)])